\documentclass{beamer}
\usepackage{amsmath}
%\usepackage{alltt}
%\usepackage{textcomp}
\usepackage{hyperref}


\mode<presentation>
{
\usetheme{Montpellier}
}

%turn off navigation symbols
%\setbeamertemplate{navigation symbols}{
%    \usebeamerfont{footline}
%    \usebeamercolor[fg]{footline}
    %\hspace{1em}
%    \insertframenumber/\inserttotalframenumber
%}

\addtobeamertemplate{navigation symbols}{}{ \hspace{1em} \usebeamerfont{footline} \insertframenumber / \inserttotalframenumber }

\defbeamertemplate{itemize item}{image}{\small\includegraphics[height=3ex]{ghidraRight.png}}
\setbeamertemplate{itemize item}[image]


\defbeamertemplate{section page}{mysectionpage}[1][]{%
\hyperlinksectionstart{}
\begin{centering}
{\usebeamerfont{section name}\usebeamercolor[fg]{section name}#1}
\vskip1em\par
\begin{beamercolorbox}[sep=12pt, center]{part title}
\usebeamerfont{section title}\insertsection\par
\end{beamercolorbox}
\end{centering}
}

\setbeamertemplate{section page}[mysectionpage]
%\setbeamertemplate{section page}{}

\defbeamertemplate{subsection page}{mysubsectionpage}[1][]{%
\begin{centering}
{\usebeamerfont{subsection name}\usebeamercolor[fg]{subsection name}#1}
\vskip1em\par
\begin{beamercolorbox}[sep=8pt, center]{part title}
\usebeamerfont{subsection title}\insertsubsection\par
\end{beamercolorbox}
\end{centering}
}

\setbeamertemplate{subsection page}[mysubsectionpage]
%\setbeamertemplate{subsection page}{}

\AtBeginSection[]
{
\begin{frame}<beamer>{Contents}
\hyperlinksectionstart{}
\tableofcontents[currentsection,hideothersubsections,sectionstyle=show/hide]
\end{frame}
}



\title[] % (optional, use only with long paper titles)
{Improving Disassembly and Decompilation}
\subtitle[]
{or \\  Moderately Advanced Ghidra Usage \\ \vspace{.2in}\includegraphics[scale=0.5]{GHIDRA_1.png}}
 

%\titlegraphic{\includegraphics[scale=0.4]{GHIDRA_1.png}}
\date{}


\begin{document}

\begin{frame}
\titlepage
\end{frame}

\author{}
\title{}
\begin{frame}
\frametitle{Table of Contents}
\tableofcontents[sections={1-5},hideallsubsections]
\end{frame}
 
\begin{frame}
\frametitle{Table of Contents}
\tableofcontents[sections={6-},hideallsubsections]
\end{frame}

\section{Intro and Setup}

\subsection{Introduction}
\begin{frame}
\begin{block}{Intro}   
\begin{itemize}
\item Like any SRE tool, Ghidra makes assumptions which sometimes need to be adjusted by reverse engineers.
\item These slides describe techniques for recognizing problematic situations and steps you can take to improve Ghidra's analysis.
\item These slides assume basic familiarity with Ghidra.
\item Note: the materials for the ``Beginner'' and ``Intermediate'' Ghidra classes are included with the Ghidra distribution. 
\end{itemize}
\end{block}
\end{frame}

\subsection{Setup}
\begin{frame}
\begin{block}{Setup}
\begin{itemize}
\item First, create a new project for the example files used by these slides. 
\item Next, import the files.  They are located in \small \textbf{$\langle$ghidra\_dir$\rangle$/docs/GhidraClass/ExerciseFiles/Advanced} \normalsize
\item The easiest way to do this is to use the Batch Importer 
\item[](\textbf{File} $\rightarrow$ \textbf{Batch Import...} from the Project Window). 
\end{itemize}
\end{block}
\end{frame}

\section{Improving Disassembly}

\subsection{Evaluating Analysis: The Entropy and Overview Sidebars}

\begin{frame}
\begin{block}{Evaluation} 
\begin{itemize}
\item Use the entropy and overview sidebars to get a quick sense of how well a binary has been analyzed/disassembled. 
\item For instance, the entropy sidebar can tell you whether your binary has regions which are likely encrypted or compressed.
\item To activate these sidebars, use the dropdown menu in the Listing (immediately to the right of the camera icon).
\end{itemize}
\end{block}
\end{frame}

\subsection{Non-Returning Functions}
\begin{frame}
\begin{block}{Non-returning Functions}
\begin{itemize}
\item Some functions, like \textbf{exit} or \textbf{abort}, are \textbf{non-returning functions}.  Such functions do not return to the caller after executing. Instead, they
do drastic things like halting the execution of the program. 
\item Suppose \textbf{panic} is a function that does not return. The compiler is free to put whatever it wants (e.g., data) after calls to \textbf{panic}.
\item If Ghidra does not know that \textbf{panic} is non-returning, it will assume that bytes after calls to \textbf{panic} are instructions and attempt to disassemble them.
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Non-returning Functions}
\begin{itemize}
\item The \textbf{Non-Returning Functions - Known} analyzer recognizes a number of standard non-returning functions by name and automatically handles them correctly.
\item The \textbf{Non-Returning Functions - Discovered} analyzer attempts to discover non-returning functions by gathering evidence during disassembly.
\item If a non-returning function manages to slip by these analyzers, it can wreak havoc on analysis.  Fortunately, there are ways to recognize and fix this situation.
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Non-returning Functions}
\begin{enumerate}
\item Open and analyze the file \textbf{noReturn}. Note: for all exercises, use the default analyzers unless otherwise specified.
\item Open the \textbf{Bookmarks} window and examine the \textbf{Error} bookmarks. There should be two errors.
\item These errors are due to one non-returning function that Ghidra doesn't know about.  Identify this function and mark it as non-returning (right-click on the name of the function in
the decompiler, select \textbf{Edit Function Signature}, and then check the \textbf{No Return} box).
\item Verify that the errors are corrected after marking the function as non-returning.
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Non-returning Functions}
\begin{itemize}
\item[] (advance for solutions)
\pause
\item The function \textbf{loopForever} is non-returning. 
\item Note: You can configure how much evidence the \textbf{Non-Returning Functions - Discovered} analyzer requires before deciding a that function is non-returning via 
\textbf{Analysis} $\rightarrow$ \textbf{Auto Analyze ...} from the Code Browser. If you lower the evidence threshold, this analyzer will mark \textbf{loopForever} as
non-returning.
\item Also, the script \textbf{FixupNoReturnFunctionsScript.java} will analyze a program and present a list of potentially non-returning functions.
It will also allow you to mark a function as non-returning and repair any damage.
\end{itemize}
\end{block}
\end{frame}


\subsection{Function Start Patterns}

\begin{frame}
\begin{block}{Finding Functions}
\begin{itemize}
\item Ghidra uses many techniques to find bytes to disassemble and to group instructions together into function bodies.
\item One such technique is to search for \textbf{function start patterns}.  These are patterns of bits (with wildcards allowed) that indicate that a particular address is likely the
start of a function.
\item These patterns are based on two facts:
\begin{enumerate}
\item Functions often start in similar ways (e.g., setting up the stack pointer, saving callee-saved registers)
\item Similar things occur immediately before a function start (return of previous function, padding bytes,...)
\end{enumerate}
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Finding Functions}
\begin{itemize}
\item Ghidra has an experimental extension for finding additional functions in a program by training models on the functions that have already been found.
\item To use it, first enable the \textbf{MachineLearning} extension from the Project Window via \textbf{File} $\rightarrow$ \textbf{Install Extensions...}
\item Restart Ghidra, then ensure that the \textbf{RandomForestFunctionFinderPlugin} is enabled in the Code Browser 
(\textbf{File} $\rightarrow$ \textbf{Configure...} then click on the plug icon in the upper right). 
\item Then select \textbf{Search} $\rightarrow$ \textbf{For Code and Functions...} from the Code Browser.
\end{itemize}
\end{block}
\end{frame}


\begin{frame}
\begin{block}{Finding Functions}
\begin{itemize}
\item The general strategy is to train several models using different choices of parameters, then select and apply the best one.  See the help for details.
\item Another useful feature is the \textbf{Disassembled View} (accessed through the \textbf{Window} menu of the Code Browser).  This allows you to see what the bytes at the current
address would disassemble to without actually disassembling them.
\end{itemize}
\end{block}
\end{frame}


\section{Improving Decompilation: Data Types}

\subsection{Defining Structures}

\begin{frame}
\begin{block}{Defining Data Types}
\begin{itemize}
\item One of the best ways to clean up the decompiled code is to apply data types.
\item You can define types manually through the \textbf{Data Type Manager}.
\item You can also have Ghidra help you by right-clicking on a variable in the decompiler view and selecting 
\begin{itemize}
\item \textbf{Auto Create (Class) Structure}, or 
\item \textbf{Auto Fill in (Class) Structure}.
\end{itemize}
\item Note: If you happen to have a C header file, you can parse data types from it by selecting \textbf{File} $\rightarrow$ \textbf{Parse C Source...} 
from the Code Browser (doesn't support C++ header files yet).
\end{itemize}
\end{block}
\end{frame}

\begin{frame}[fragile]
\begin{block}{Exercise: Auto-creating Structures}
\begin{enumerate}
\item Open and analyze the file \textbf{createStructure}.
\item[] This file contains two functions of interest: \textbf{setFirstAndThird} and \textbf{setSecondAndFourth}. 
\item[] The first parameter to each of these functions has type \textbf{exampleStruct *}, where \textbf{exampleStruct} is defined as follows:
\item[] \begin{verbatim}
          typedef struct {
              long a
              int b
              char *c;
              short d
          } exampleStruct;
\end{verbatim}
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Auto-creating Structures}
\begin{enumerate}
\setcounter{enumi}{1}
\item Navigate to \textbf{setFirstAndThird}. 
\item In the decompiler view, change the type of the second parameter to \textbf{long} and the third parameter to \textbf{char *}
\item In the decompiler view, right-click on \textbf{param\_1} and select \textbf{Auto Create Structure}.
\item Right-click on the default structure name (\textbf{astruct}) in the decompiler and select \textbf{Edit Data Type}.
\item Change the name of the structure to \textbf{exampleStruct}, then name the field at offset 0x0 \textbf{a} and the field at offset 0x10 \textbf{c}.
\item Note that this isn't all of the fields in the structure, just the ones that were used in this function.
\item[] (continued)
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Auto-creating Structures}
\begin{enumerate}
\setcounter{enumi}{7}
\item Now navigate to \textbf{setSecondAndFourth}.
\item Change the type of the first parameter to \textbf{exampleStruct *}, the type of the second to \textbf{int}, and the type of the third to \textbf{short}.
\item Right-click on the first parameter and select \textbf{Auto Fill in Structure}.
\item Edit the structure again to add the names from the structure definition for the new fields (you can also select each field in the decompiler and press \textbf{L}).
\item Revel in how much better the decompilation of the two functions looks!
\end{enumerate}
\end{block}
\end{frame}

\subsection{Defining Classes}
\begin{frame}
\begin{block}{Defining Classes}
\begin{itemize}
\item If a variable is known to be a \textbf{this} parameter, right-clicking on it will yield a menu with the option \textbf{Auto Fill in Class Structure} instead 
of \textbf{Auto Fill in Structure}.
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Defining Classes}
\begin{enumerate}
\item Open and analyze the file \textbf{animals}.
\item In the Listing, press \textbf{G} (goto).  In the resulting pop-up, enter \textbf{getAnimalAge}.  \item This will bring up a search results window where you can 
select between the two functions with the name \textbf{getAnimalAge} (the functions are in different namespaces).
\item[] Note: There are other windows, such as the \textbf{Functions} window, in which the namespace column does not appear by default.  You can add it by right-clicking 
on any column name and selecting \textbf{Add/Remove Columns...}  You can also configure the display of certain columns by right-clicking on the column name and selecting \textbf{Column Settings...}
\item[] (continued)
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Defining Classes}
\begin{enumerate}
\setcounter{enumi}{3}
\item Select \textbf{Dog::getAnimalAge} in the pop-up. This will cause the Code Browser to navigate to \textbf{Dog::getAnimalAge()}.
\item[] Note: Alternatively, you can quickly navigate to the functions in a class using the \textbf{Classes} folder of the \textbf{Symbol Tree}.
\item Verify that in the decompiler view, right-clicking on the token \textbf{this} yields a menu with \textbf{Auto Fill in Class Structure} as an option.  
Note that Ghidra has already created an empty structure named \textbf{Dog}.
\end{enumerate}
\end{block}
\end{frame}

\subsection{Decompiling Virtual Function Calls}

\begin{frame}[fragile]
\begin{block}{Exercise: Virtual Function Tables}
\begin{enumerate}
\item  Here is what the end of \textbf{main} looks like in the source code:
\label{snippet}
\item[]
\begin{verbatim}
    Animal *a;
    ...
    a->printInfo();   //non-virtual
    a->printSound();  //virtual
    a->printSpecificFact(); //virtual
    int animalAge = a->getAnimalAge(); //virtual
    delete(a);
    return animalAge;
\end{verbatim}
\item[] Navigate to the function \textbf{main} and examine Ghidra's decompilation.
\item[] (continued)
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Virtual Function Tables}
\begin{enumerate}
\setcounter{enumi}{1}
\item The task is to get the names of the virtual functions to show up in the decompiler. At a high level, the steps are:
\begin{itemize}
\item For each virtual function \textbf{foo} of the class \textbf{Animal}, create a function definition, which is a data type representing the signature of \textbf{foo}.
\item Create a data type for the vftable of \textbf{Animal}. This data type will be a structure whose fields are the function signature data types (in order).
\item Change the first field of the \textbf{Animal} data type to be a pointer to the vftable data type.
\end{itemize}
\item[] (continued)
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Virtual Function Tables}
\begin{enumerate}
\setcounter{enumi}{2}
\item First, create a function definition for each of the virtual functions 
\begin{itemize}
\item \textbf{void printSound(void)}
\item \textbf{void printSpecificFact(void)}
\item \textbf{int getAnimalAge(void)}
\end{itemize}
by right-clicking on \textbf{animals} in the \textbf{Data Type Manager} and selecting \textbf{New} $\rightarrow$ \textbf{Function Definition...} 
\item[] For each function, enter the signature and select \textbf{\_thiscall} for the calling convention. 
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Virtual Function Tables}
\begin{enumerate}
\setcounter{enumi}{3}
\item Now, right-click on \textbf{animals} in the \textbf{Data Type Manager} and select \textbf{New} $\rightarrow$ \textbf{Structure...}
\item Give the new structure the name \textbf{Animal\_vftable}.
\item Fill in the structure with the data types corresponding to the virtual functions of the class \textbf{Animal}.  You can do this by double-clicking 
on an entry in the \textbf{DataType} column and entering the name used when creating a function definition.  
\item[] Notes:
\begin{itemize}
\item The order of the functions in the vftable is the same as the order they are called in the source code snippet.
\item Be sure to give each field in the vftable structure a name (use the name of the corresponding virtual function).
\end{itemize}
\item[] (continued)
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Virtual Function Tables}
\begin{enumerate}
\setcounter{enumi}{6}
\item Alternatively:
\begin{itemize}
\item Find the vftable for \textbf{Animal} (from the Code Browser, \textbf{Search}~$\rightarrow$~\textbf{For Address Tables...}) and look for the table consisting of calls to 
\textbf{\_\_cxa\_pure\_virtual}.
\item Apply the three function definition data types to the pointers in the table in the appropriate order.
\item Select the table in the Listing, right-click, \textbf{Data}~$\rightarrow$~\textbf{Create Structure}
\end{itemize}
\item In main, re-type the variable passed to \textbf{printInfo} to have type \textbf{Animal *} and rename it to \textbf{a}. Note that this will eliminate the
cast to \textbf{Animal *} of the argument passed to \textbf{printInfo}.
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Virtual Function Tables}
\begin{enumerate}
\setcounter{enumi}{8}
\item Right-click on \textbf{a} and select \textbf{Auto Fill in Structure} (note that this does not say \textbf{Auto Create Structure} since Ghidra automatically created a default empty \textbf{Animal} structure).
\item Finally, edit the \textbf{Animal} structure itself so that the first field is an element of type \textbf{Animal\_vftable *} with name \textbf{Animal\_vftable}.
\item Verify that the virtual function names appear in the decompilation of \textbf{main}.
\end{enumerate}
\end{block}
\end{frame}

\section{Improving Decompilation: Function Calls}

\subsection{Introduction}
\begin{frame}
\begin{block}{Function Signatures and Calls}
\begin{itemize}
\item In this section, we focus on issues involving function signatures and function calls.
\end{itemize}
\end{block}
\end{frame}

\subsection{Function Signatures: Listing vs. Decompiler}
\begin{frame}
\begin{block}{Refresher on Function Signatures in Ghidra:}
\begin{itemize}
\item In order to decompile \textbf{foo}, the decompiler needs to know the signatures of \textbf{foo} and any callees.
\item If a needed signature has been saved to the program database by the user or by a ``high confidence'' analyzer (e.g., recognized as a library function), the
decompiler will used the saved signature. 
\item Otherwise, the decompiler will apply local heuristics to determine any needed signatures.  In this case, the signature of \textbf{foo} in the decompiler
can differ from the one shown in the Listing, and two different calls to \textbf{bar} within \textbf{foo} could have different signatures.   
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Refresher on Function Signatures in Ghidra:}
\begin{itemize}
\item The default signature shown in the Listing is created when the function is (re-)created.  This is the signature that is stored in the Ghidra program database 
(possibly with low confidence).
\item To save the signature shown in the decompiler, right-click in the decompiler window and select \textbf{Commit Params/Return}. 
\item Note that editing a function's signature manually, from either the Listing or the decompiler, commits the new signature to the program database.
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Refresher on Function Signatures in Ghidra:}
\begin{itemize}
\item To save the names of the local variables of a function to the program database, right-click in the decompiler window and select \textbf{Commit Local Names}. 
\item Note that this action does not commit the type of the local variables. You can re-type a local variable to save the type, but oftentimes it is better to let
the decompiler figure out the types of local variables on its own.  See the ``Forcing Data-types'' entry in the Ghidra help for more information. 

\end{itemize}
\end{block}
\end{frame}



\subsection{The Decompiler Parameter ID Analyzer}
\begin{frame}
\begin{block}{Decompiler Parameter ID}
\begin{itemize}
\item The \textbf{Decompiler Parameter ID Analyzer} (\textbf{Analysis} $\rightarrow$ \textbf{One Shot} $\rightarrow$ \textbf{Decompiler Parameter ID}) uses the decompiler and an
exploration of the call tree to determine parameter, return type, and calling convention information about functions in a program.  This analyzer can be quite useful when 
you have some rich type information, such as known types from library calls.  However, if you run this analyzer too early or before fixing problems, you can end up propagating 
bad information all over the program.
\item Note: this analyzer will commit the signature of each function.
\end{itemize}
\end{block}
\end{frame}

\subsection{Overriding a Signature at a Call Site}
\begin{frame}
\begin{block}{Overriding Signatures}
\begin{itemize}
\item It is possible to override the signature used at a particular call site.
\item This is basically only ever needed for variadic functions (functions which take a variable number of arguments), or to adjust the arguments of indirect calls.
In other cases you should edit the signature of the called function directly.
\item To override a signature, right-click on the function call in the decompiler and select \textbf{Override Signature}.
\item To remove an override, right-click and select \textbf{Remove Signature Override}.
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Aside: The System V AMD64 ABI}
\begin{itemize}
\item For reference when doing the exercises, here is the calling convention used by Linux on x86\_64:
\begin{itemize}
\item First 6 integer/pointer args are passed in \textbf{RDI}, \textbf{RSI}, \textbf{RDX}, \textbf{RCX}, \textbf{R8}, \textbf{R9}.
\item First 8 floating point args are passed in \textbf{XMM0}-\textbf{XMM7}. 
\item Additional args are passed on the stack.
\item For variadic functions, the number of floating point args passed in the \textbf{XMM} registers is passed in \textbf{AL}.
\end{itemize}
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Overriding Signatures}
\begin{enumerate}
\item Open and analyze the file \textbf{override.so}, then navigate to the function \textbf{overrideSignature}.  Override the signature of the call 
to \textbf{printf}, if necessary, using the format string to determine number and types of the parameters to the call. Some of the parameters to 
\textbf{printf} are global variables; determine and apply their types. 
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Overriding Signatures}
\begin{itemize}
\item[] (advance for solution)
\pause
\item Signature:
\item[] ~~~~\textbf{printf(char *,int,long,double,char *,int,int,int,int)}
\item Types:
\item[] ~~~~\textbf{a}: \textbf{int}
\item[] ~~~~\textbf{b}: \textbf{long}
\item[] ~~~~\textbf{c}: \textbf{double}
\item[] ~~~~\textbf{d}: \textbf{char[2] }
\item Note: The \textbf{Variadic Function Signature Override} analyzer will determine and apply the override for you.  It's disabled by default, but you can 
run it as a one-shot analyzer.
\end{itemize}
\end{block}
\end{frame}

\subsection{Custom Calling Conventions}
\begin{frame}
\begin{block}{Custom Calling Conventions}
\begin{itemize}
\item Sometimes a function will use a non-standard calling convention.
\item In such a case, you can edit the calling convention manually.
\item To do this, right-click on the function in the decompiler and select \textbf{Edit Function Signature}.
\item In the resulting window, select \textbf{Use Custom Storage} under \textbf{Function Attributes}.
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Custom Calling Conventions}
\begin{enumerate}
\item Open and analyze the file \textbf{custom}, then navigate to the function \textbf{main}. 
\item \textbf{main} calls the functions \textbf{sum} and \textbf{diff}, which have custom calling conventions.
\item Examine the bodies and call sites of \textbf{sum} and \textbf{diff} to determine their signatures and custom calling conventions.
\item Edit each of the two functions and select \textbf{Use Custom Storage}.
\item Type the correct signature into the text window and press Enter.
\item[] (continued)
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Custom Calling Conventions}
\begin{enumerate}
\setcounter{enumi}{5}
\item Each row in the \textbf{Function Variables} table corresponds to a function parameter or return. Click on the entries in the \textbf{Storage} 
column to set the storage for each entry.
\item In the resulting \textbf{Storage Address Editor} window, click \textbf{Add} to add storage, then click on each
table entry to modify. In general, there can be several locations assigned to one parameter. For example, a given parameter might be a structure that is passed
in several registers due to its size.  However, for this exercise, you will only need one location per parameter.
\item You might find it helpful to remove some of the variable references Ghidra adds in the Listing, particularly to stack variables.  To do this, \textbf{Edit} 
$\rightarrow$ \textbf{Tool Options} $\rightarrow$ \textbf{Listing Fields} $\rightarrow$ \textbf{Operands Field} from the Code Browser.
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Custom Calling Conventions}
\begin{itemize}
\item[] (advance for solutions)
\pause
\item \textbf{long sum(long, long)}: return in \textbf{RAX}, args in \textbf{R14, R15}.
\item \textbf{long diff(long, long)}: return in \textbf{RBX}, args in \\ \textbf{[RSP + 0x8]}, \textbf{[RSP + 0x10]}
\end{itemize}
\end{block}
\end{frame}

\subsection{Multiple Storage Locations}
\begin{frame}
\begin{block}{Multiple Storage Locations}
\begin{itemize}
\item As mentioned previously, you can add multiple storage locations for a single parameter or return when editing a function signature.
\item A relatively common use of this is for functions that return \textbf{register pairs}.
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Multiple Storage Locations}
\begin{enumerate}
\item Open and analyze the file \textbf{ldiv}, then navigate to the function \textbf{main}.
\item In the decompiler, right-click on the call to \textbf{ldiv} and select \textbf{Edit Function Signature}.  How does \textbf{ldiv} use multiple storage locations for a function variable?
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Multiple Storage Locations}
\begin{itemize}
\item[] (advance for solution)
\pause
\item The result of \textbf{ldiv} is returned in the register pair \textbf{RDX:RAX} (\textbf{RAX} contains the quotient, \textbf{RDX} contains the remainder). 
\end{itemize}
\end{block}
\end{frame}

\subsection{Inlining Functions}

\begin{frame}
\begin{block}{Inlining Functions}
\begin{itemize}
\item Some special functions have side effects that the decompiler needs to know about for correct decompilation. You can handle this situation by marking them
as \textbf{inline}. 
\item If \textbf{foo} is marked as inline, calls to \textbf{foo} will be replaced by the body of \textbf{foo} during decompilation.
\item To mark \textbf{foo} as inline, edit \textbf{foo}'s signature and check the \textbf{In~Line} function attribute.
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Inlining Functions}
\begin{itemize}
\item Inlining a function is related to the notion of a \textbf{call fixup}, where calls to certain functions are replaced with snippets of Pcode.  
\item Note that the Pcode \textbf{CALL} op is replaced, which can be just part of the semantics of a native call instruction.
\item These functions are recognized by name and have the call fixup applied automatically. 
\item Examples include functions related to structured exception handling in Windows.
\item You can also select from pre-defined call fixups when editing a function signature.
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Inlining Functions}
\begin{enumerate}
\item Open and analyze the file \textbf{inline}, then navigate to the function \textbf{main}.
\item When provided with the correct number of command line arguments, this function should parse \textbf{argv[1]} and \textbf{argv[2]} into unsigned long values and print their sum.
The task is to get the decompiler to show this.
\item First, ensure that \textbf{main} has the correct signature \\ (\textbf{int main(int argc, char **argv)}).
\item Next, override the signature of the call to \textbf{printf} if necessary, so that it agrees with the format string.
\item[] (continued) 
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Inlining Functions}
\begin{enumerate}
\setcounter{enumi}{4}
\item The decompilation will still be incorrect. Marking \textbf{adjustStack} and \textbf{restoreStack} as inline yields correct decompilation.  Why?
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Inlining Functions}
\begin{itemize}
\item[] (advance for solutions)
\pause
\item \textbf{adjustStack} decreases the stack pointer by 8 (total), which violates the calling convention Ghidra assigned to it by default. This discrepancy leads to incorrect analysis. If you mark \textbf{adjustStack} and \textbf{restoreStack} as inline, their bodies will be incorporated into \textbf{main} during decompilation and the changes to the stack pointer will be tracked.
\end{itemize}
\end{block}
\end{frame}

\subsection{System Calls}
\begin{frame}
\begin{block}{System Calls}
\begin{itemize}
\item \textbf{System calls} are a way for a program to request a service from the operating system.
\item Examples include process control, file management, device management,\ldots
\item A typical implementation uses a special native instruction along with a designated register, which we'll call the
\textbf{system call register}.
\item When the special instruction is executed, the value in the system call register determines which function is called.
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: System Calls}
\begin{enumerate}
\item Open and analyze the file \textbf{write}, then navigate to \textbf{main}.
\item[] Note: \textbf{main} prints \texttt{Hello World!} to the screen using the \textbf{write} system call.
\end{enumerate}
\begin{itemize}
\item Before going further, let's examine what we see.
\begin{itemize}
\item In the decompiler, you should see \textbf{syscall()}, which looks like a function call but isn't (try clicking on it). 
\item This is an example of a \textbf{user-defined Pcode op}.
\item These operations show up as \textbf{CALLOTHER} Pcode ops in the Pcode field in the Listing.  They can have inputs and outputs, but otherwise are treated
as black boxes by the decompiler.
\item Such operations are used, for example, for machine instructions that can't be modeled exactly in Pcode.  
\end{itemize}
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: System Calls}
\begin{enumerate}
\setcounter{enumi}{1}
\item In the decompiler, why is the return value of \textbf{main} \texttt{undefined [16]}?
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: System Calls}
\begin{itemize}
\item[] (advance for solutions)
\pause
\item The \textbf{SYSCALL} instruction is translated to a single \textbf{CALLOTHER} Pcode op (named \textbf{syscall}).  The decompiler does not consider this operation to have any 
side effects, so when it tries to automatically determine the return type it sees a move to \textbf{RDX} and a move to \textbf{RAX} before the \textbf{RET} instruction.  
These registers form a register pair for this architecture, so the decompiler thinks the return value is 16 bytes.
\item So how do we improve the decompilation?
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: System Calls}
\begin{itemize}
\item This system call is a call to \textbf{write} since 1 is written to the system call register (\textbf{RAX}) before the \textbf{syscall} 
instruction is executed (search online for ``x64 Linux syscall table").
\item We'd like the call to \textbf{write} to appear with the correct name, signature, and calling convention.
\item We'd also like cross references, so that we can easily see all calls to \textbf{write}.
\item During execution, the code for the \textbf{write} function is somewhere in the kernel and not in the program's address space.
\item So what should the call target be in Ghidra?
\item Answer: use \textbf{overlay blocks} on the \textbf{OTHER} space.
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: System Calls}
\begin{itemize}
\item Prior to Ghidra 9.1, the \textbf{OTHER} space was used to store data from a binary that does not get loaded into memory, such as the \texttt{.comment} section of an ELF file.
\item In 9.1, we've extended the ability to make references into the \textbf{OTHER} space.
\item You can't use this space directly, but you can create \textbf{overlay blocks} on the \textbf{OTHER} space.
\item Overlays are a (sort of old school) technique to allow different blocks to be swapped in and out at the same address.  
\item For our purposes, they allow us to put things in an artificial memory space without the possibility of conflicting with other uses of that space.
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: System Calls}
\begin{enumerate}
\setcounter{enumi}{2}
\item Create an overlay of the \textbf{OTHER} space as follows:
\begin{enumerate}[(i)]
\item Bring up the \textbf{Memory Map} by clicking on the ram chip icon in the tool bar of the Code Browser.
\item Click on the green plus to add a block.
\item In the resulting dialog, name the block \textbf{syscall\_block}.  Have it start at address 0x0 of the \textbf{OTHER} space and have length 0x1000.
Check the \textbf{Overlay} and \textbf{Artificial} boxes. 
\end{enumerate}
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: System Calls}
\begin{enumerate}
\setcounter{enumi}{3}
\item Next, go to address 0x1 in \textbf{syscall\_block} and create a function (in the Listing, select both the address and the \texttt{??} and press \textbf{F}).
\item Edit this new function to give it the name \textbf{write} and the \textbf{syscall} calling convention.
\item If you happen to know the parameters and their types you can add them.  Alternatively, select the new function \textbf{write} in the Code Browser, right-click on 
\textbf{generic\_clib\_64} in the \textbf{Data Type Manager}, and select \textbf{Apply Function Data Types}
\item[] Note: the function we've created has no body. It's essentially an address to store a function signature and to get cross-references.
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: System Calls}
\begin{enumerate}
\setcounter{enumi}{6}
\item Now, navigate back to the \textbf{SYSCALL} instruction in \textbf{main}. 
\item Click on the instruction in the Listing, then press \textbf{R} to bring up the \textbf{Reference Manager}.
\item Click the green plus to add a reference. Check the \textbf{Include OTHER Overlay Spaces} box then select \textbf{syscall\_block} in the drop-down. 
\item Enter 0x1 for the ``To Address'' and for the Ref-Type select \textbf{CALLOTHER\_CALL\_OVERRIDE}. This reference type essentially transforms the \textbf{CALLOTHER} Pcode op to a \textbf{CALL} op before sending the Pcode to the decompiler.  The call target is the ``To Address''
of the reference.
\item[] The decompilation should now look as expected.
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{System Call Notes}
\begin{enumerate}
\item The script \textbf{ResolveX86orX64LinuxSyscallScript.java} will do all of this for you. You can run it on this file, but a better demonstration is to run it on a 
libc shared object file.
\item The script uses the \textbf{Symbolic Propagator} to determine the value of a register at a particular location.
\item The script requires a mapping from system call numbers to system call names.  The x86 and x64 ones come with Ghidra, you will need to supply others.
\item[] (continued)
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{System Call Notes}
\begin{enumerate}
\setcounter{enumi}{3}
\item Also, the signatures of most Linux system calls are included with Ghidra (used in step 6 above).  The script shows you how to apply function data types programmatically,
but you might have to supply your own data type archive.
\item \textbf{CALLOTHER\_CALL\_OVERRIDE} references should not be applied to \textbf{CALLOTHER} ops with an output (see the Ghidra help for details).
\end{enumerate}
\end{block}
\end{frame}

\subsection{Program Specification Extensions}
\begin{frame}
\begin{block}{Program Specification Extensions}
\begin{itemize}
\item Calling conventions, call fixups, and callother fixups are normally defined in \texttt{.cspec} files.  They are available to any program imported with the associated compiler spec.
\item It is also possible to define them in XML files and import them into individual programs as \textbf{Program Specification Extensions}.
\item The definitions in a specification extension XML file should look like those in a \texttt{.cspec} file, i.e., you can create an XML file by finding an appropriate definition in 
a \texttt{.cspec} file then excising and modifying it. 
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Callfixup Extensions}
\begin{enumerate}
\item Delete the existing \textbf{inline} Ghidra program, then import and analyze the \textbf{inline} executable again.
\item Ensure that \textbf{main} has the correct signature and apply the correct overriding signature at the call to \textbf{printf}.
\item Using a text editor, create the following two XML files:
\item[] (continued)
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}[fragile]
\begin{block}{Exercise: Callfixup Extensions}
\begin{verbatim}
<callfixup name="adjustStack">
    <pcode>
     <body><![CDATA[
       RSP = RSP - 8;
     ]]></body>
    </pcode>
  </callfixup>
\end{verbatim}
File: \texttt{adjust.xml}
\end{block}
\end{frame}

\begin{frame}[fragile]
\begin{block}{Exercise: Callfixup Extensions}
\begin{verbatim}
<callfixup name="restoreStack">
    <pcode>
     <body><![CDATA[
       RSP = RSP + 24;
     ]]></body>
    </pcode>
  </callfixup>
\end{verbatim}
File: \texttt{restore.xml}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Callfixup Extensions}
\begin{enumerate}
\setcounter{enumi}{2}
\item \textbf{Edit $\rightarrow$ Options for 'inline' $\rightarrow$ Specification Extensions}
\item Import and apply \textbf{adjust.xml} and \textbf{restore.xml}.
\item Navigate to \textbf{adjustStack} and \textbf{restoreStack} and apply the appropriate call fixups.
\end{enumerate}
\end{block}
\end{frame}


\section{Improving Decompilation: Control Flow}

\subsection{Fixing Switch Statements}
\begin{frame}
\begin{block}{Fixing Switch Statements}
\begin{itemize}
\item Sometimes you will see warnings in the decompiler view stating that there are too many branches to recover a jumptable. 
\item One reason for this is that there actually is a jumptable, but the decompiler can't determine bounds on the switch variable.
\item In such cases, you can add the jump targets manually and then run the script \textbf{SwitchOverride.java}. 
\item Note: To find such locations in a program, run the script \textbf{FindUnrecoveredSwitchesScript.java}.
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Fixing Switch Statements}
\begin{enumerate}
\item Open and analyze the file \textbf{switch}, then navigate to the function \textbf{main}. The decompiler view should contain a warning about an unrecovered jumptable.
\item The global variable \textbf{array} is the jumptable. 
\item Navigate to \textbf{array} in the Listing and press \textbf{P} to define the first element to be a pointer.  Note: this will clear any data type information that Ghidra assigned to 
\textbf{array} automatically.
\item Now press \textbf{[} to define an array.  Enter 10 for the number of elements. 
\item This will trigger disassembly at each of the addresses in the jumptable, but these addresses are not yet part of the function \textbf{main}.
\item[] (continued)
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Fixing Switch Statements}
\begin{enumerate}
\setcounter{enumi}{5}
\item Navigate to the \textbf{JMP} instruction which jumps to \textbf{array} + an offset.
\item Press \textbf{R} to bring up the References Editor and click on the mnemonic (\textbf{JMP}).
\item You can use the green plus to add a \textbf{COMPUTED\_JUMP} reference to each address stored in the jumptable one at a time.
\item Alternatively:
\begin{itemize}
\item Select the \textbf{JMP} instruction
\item \textbf{Select} $\rightarrow$ \textbf{Forward Refs} from the Code Browser.
\item \textbf{Select} $\rightarrow$ \textbf{Forward Refs} again.
\item Drag the selection onto the References Editor Dialog.
\end{itemize}
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Fixing Switch Statements}
\begin{enumerate}
\setcounter{enumi}{9}
\item Right-click on the label \textbf{main} in the Listing, then select \textbf{Function} $\rightarrow$ \textbf{Re-create Function}.
\item The jump targets are now part of \textbf{main}, which you can verify by examining the Function Graph.
\item Finally, navigate back to the \textbf{JMP} instruction and use the Script Manager to run \textbf{SwitchOverride.java}.
\end{enumerate}
\end{block}
\end{frame}

\subsection{Shared Returns}
\begin{frame}
\begin{block}{Shared Returns}
\begin{itemize}
\item If a \textbf{callerOne} ends with call to \textbf{callee}, compilers will sometimes perform an optimization which replaces that final call with a jump.
\item If \textbf{callerOne} and \textbf{callerTwo} both end with calls to \textbf{callee}, this optimization will result in \textbf{callerOne} and \textbf{callerTwo} 
ending with jumps to \textbf{callee}.
\item The \textbf{Shared Return Analyzer} detects this situation and modifies the flow of the jump instruction to have type \textbf{CALL\_RETURN}. This will change how
the functions are displayed in the decompiler.
\item You can also do this manually, in case the analyzer missed something (for example, if only one of the functions sharing a final call/jump has been found and disassembled). 
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Shared Returns}
\begin{enumerate}
\item Uncheck the \textbf{Shared Return Calls} analyzer before analyzing \textbf{sharedReturn}.
\item This file has been stripped of symbols.  To find \textbf{main}, navigate to \textbf{entry} and look for the call to \textbf{\_\_libc\_start\_main}. The first argument to this 
call corresponds to the \textbf{main} function in the source code.   
\item \textbf{main} contains two calls to non-library functions.  Each callee contains a \textbf{JMP} instruction corresponding to what was a function call in the source code. 
\item Find these \textbf{JMP} instructions, right-click, select \textbf{Modify Instruction Flow...}, and change the flow to \textbf{CALL\_RETURN}. Verify that a new function call appears 
in the decompilation.
\end{enumerate}
\end{block}
\end{frame}

\subsection{Control Flow Oddities}
\begin{frame}
\begin{block}{Opaque Predicates}
\begin{itemize}
\item One anti-disassembly technique is to create an if-else statement with a condition that always evalutes to the same value, but complicated enough for this to be difficult to 
determine statically.  
\item  This is an example of an \textbf{opaque predicate}.
\item  The branch that is never taken can contain bytes sequences intended to thwart static analysis, such as sequences which disassemble to jumps to invalid targets. 
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Opaque Predicates}
\begin{enumerate}
\item Open and analyze the file \textbf{opaque}, then navigate to the function \textbf{main}.
\item \textbf{main} contains an opaque predicate.  Find it and fix it by either:
\begin{enumerate}[(i)]
\item Changing a conditional jump to an unconditional jump using the instruction patcher.  To patch an instruction, right-click on it in the Listing and select \textbf{Patch Instruction}.
\item Adding a (primary) reference with Ref-Type \textbf{JUMP\_OVERRIDE\_UNCONDITIONAL} on the appropriate conditional jump. The ``To Address'' of the reference should be the jump target. 
 To the decompiler, this will change the conditional jump to an unconditional jump.
\end{enumerate}
\item[] (hint on next slide)
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Opaque Predicates}
\begin{itemize}
\item Hint: The opaque predicate is based on the fact that if you square an integer and reduce mod 4, you can only ever get 0 or 1.  Look for a multiplication, modular reduction (optimized to a bitmask), and comparison in the assembly.
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Jumps Within Instructions}
\begin{itemize}
\item The decompiler can repeatedly disassemble the same byte as part of different instructions as it follows flow.
\item In the Listing, however, a given byte is assigned to at most one instruction by default.
\item One consequence is that the decompilation can be correct even if the Listing shows a disassembly error.
\item This can happen when encountering certain anti-disassembly techniques.
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Jumps Within Instructions}
\begin{enumerate}
\item Open and analyze the file \textbf{jumpWithinInstruction}, then navigate to the function \textbf{main}.
\item You should see an error in the disassembly but correct decompilation (with a warning).  What's going on?
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Jumps Within Instructions}
\begin{itemize}
\item[] (advance for solutions)
\pause
\item \textbf{eb ff} is \textbf{JMP~.+1}.  After this instruction executes, \textbf{ff~c0} are the bytes of the next instruction to execute, but this is not clear from the Listing.  
\item In Ghidra:
\begin{itemize}
\item Right-click on the \textbf{JMP} instruction and select \textbf{Modify Instruction Length...}  Change the length to 1.
\item Right-click on \textbf{main()} in the Listing, select \textbf{Function}$\rightarrow$ \textbf{Re-create Function}.
\end{itemize} 
\item Note: The script \textbf{FixOffcutInstructionScript.java} applies length overrides as needed to fix certain errors (such as those related to conditional jumps to skip \textbf{LOCK} prefixes).  See the script description for details.
\end{itemize}
\end{block}
\end{frame}

\section{Improving Decompilation: Data Mutability}

\subsection{Changing Data Mutability}

\begin{frame}
\begin{block}{Data Mutability}
\begin{itemize}
\item \textbf{Data Mutability} refers to the assumptions Ghidra makes regarding whether a particular data element can change.
\item There are four data mutability settings:
\begin{enumerate}
\item normal
\item constant
\item volatile
\item writable
\end{enumerate}
\item There are two ways to change data mutability:
\begin{enumerate}
\item Right-click on the (defined) data in the Listing and select \textbf{Settings...} 
\item Set the mutability of an entire block of memory through the Memory Map (\textbf{Window} $\rightarrow$ \textbf{Memory Map} from the Code Browser).
\end{enumerate}
\end{itemize}
\end{block}
\end{frame}

\subsection{Constant Data}

\begin{frame}
\begin{block}{Constant Data}
\begin{itemize}
\item The decompiler will display the contents of a memory location if the contents are marked as constant.
\item Otherwise it will display a pointer to the location.
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Constant Data}
\begin{enumerate}
\item Open and analyze the file \textbf{dataMutability}, then navigate to the function \textbf{main}.
\item Change the settings of the target of the pointer variable \textbf{writeable} to constant by right-clicking and selecting \textbf{Data} $\rightarrow$ \textbf{Settings...} 
in the Listing.  Verify that the changes are reflected in the decompiler.
\item Restore the data mutability and change it again by modifying the permissions of the appropriate block in the Memory Map. 
\end{enumerate}
\end{block}
\end{frame}

\subsection{Volatile Data}
\begin{frame}
\begin{block}{Volatile Data}
\begin{itemize}
\item Marking a data element as volatile tells the decompiler to assume that the value of a variable could change at any time.
\item This can prevent certain simplifications.
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Volatile Data}
\begin{enumerate}
\item Note that the decompiler prints warning comments at the top of \textbf{main} indicating that unreachable code blocks have been removed.
\item You can prevent this by selecting \textbf{Edit} $\rightarrow$ \textbf{Tool Options} $\rightarrow$ \textbf{Decompiler} $\rightarrow$ \textbf{Analysis} and unchecking
\textbf {Eliminate unreachable code} (there's also a button in the decompiler toolbar).
\item After doing this, you will see the global variable \textbf{status} appear in the decompilation.  In the Listing, note that it is set to zero and then tested.  This is a hint that
\textbf{status} might be volatile.
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Volatile Data}
\begin{enumerate}
\setcounter{enumi}{3}
\item Mark the data element labelled \textbf{status} as volatile and verify that additional code appears in the decompilation 
of the function \textbf{main} (make sure to re-enable unreachable code elimination in the decompiler if you've disabled it). 
\item Note: You might have to override the signature on the call to \textbf{printf} to get all of its arguments to appear in the decompilation.
\end{enumerate}
\end{block}
\end{frame}

\section{Improving Decompilation: Setting Register Values}

\subsection{How and Why to Set Register Values}
\begin{frame}
\begin{block}{Setting Register Values}
\begin{itemize}
\item Setting a context register (for example, to select ARM or Thumb mode) is a common reason to set register values in Ghidra.
\item Additionally, if you set a register value at the beginning of a function, the value will be sent to the decompiler.
\item To set a register value, right-click on an address in the Listing and select \textbf{Set Register Values...}
\item This can be helpful if a register is used to store a global variable.  Additionally, it can sometimes be helpful to set register values when trying to 
understand a function.  The decompiler will perform additional transformations, which may yield a simplified view of how the function behaves in restricted cases.
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Global Variables in Registers}
\begin{enumerate}
\item Open and analyze the file \textbf{globalRegVars.so}, then navigate to the function \textbf{initRegisterPointerVar}.  
\item This function stores the address of a global variable into a register.  Determine the address and the register.
\item Set the value of the register to be the address at the beginning of the functions \textbf{setRegisterPointerVar} and \textbf{getRegisterPointerVar}. If you do it correctly,
\textbf{getRegisterPointerVar} should decompile to 
\item[]\textbf{\{}
\item[]\textbf{~~~~return c;}
\item[]\textbf{\}}
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise: Simplifying Transformations}
\begin{enumerate}
\item Open and analyze the file \textbf{setRegister}, then navigate to the function \textbf{switchFunc}.  Set the switch variable (in \textbf{RDI}) to a few 
different values and observe the effect on the decompiled code (recall that you must set the register value at the function entry point for it to be sent
to the decompiler).
\end{enumerate}
\end{block}
\end{frame}


\section{Troubleshooting Decompilation}

\subsection{Identifying Problems in the Decompiled Code}

\begin{frame}
\begin{block}{\textbf{in\_}, \textbf{unaff\_}, and \textbf{extraout\_}}
\begin{itemize}
\item Occasionally, you may see variables in the decompiler view whose names begin with \textbf{in\_}, \textbf{unaff\_}, or \textbf{extraout\_}.
\item \textbf{in\_} or \textbf{unaff\_}: this typically indicates that a register is read before it is written (and it does not contain a parameter passed to the function).
\item Variables that begin with \textbf{extraout\_} can occur when the decompiler thinks that a value is being used that should have been killed by a call. 
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Pcode in the Decompiler View}
\begin{itemize}
\item Occasionally, you might see Pcode operations in the decompiler code. 
\item Examples: \textbf{ZEXT, SEXT, SUB, CONCAT,...}
\item See the ``Decompiler'' section in the Ghidra help. 
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Return Addresses Assigned to Local Variables}
\begin{itemize}
\item Another indication of an error when decompiling \textbf{foo} is a line such as
\item[] \textbf{uVar1 = 0x12345678} 
\item[] where 0x12345678 is an address in the body of \textbf{foo}.  This usually means that there's a problem with the decompiler's stack analysis.
\end{itemize}
\end{block}
\end{frame}

\subsection{Potential Causes}
\begin{frame}
\begin{block}{Potential Causes}
\begin{enumerate}
\item The decompiler has a function signature or calling convention wrong (for the function being decompiled or one of its callees). 
\begin{itemize}
\item A common situation is some kind of size mismatch, for example, the decompiler thinks that a call returns a 32-bit value but sees all of 
\textbf{RAX} being used. But then where did the high 32 bits come from?
\end{itemize}
\item There's a register that actually contains a global parameter or is set as the side effect of a called function.
\item There's a function that should be marked as non-returning.
\end{enumerate}
\end{block}
\end{frame}

\subsection{Potential Fixes}
\begin{frame}
\begin{block}{Potential Fixes}
\begin{itemize}
\item To fix these issues, the first step is to try to determine if the decompiler is using something false (either the result of a heuristic or something saved to the program).
\item Oftentimes, you can correct such errors by:
\begin{itemize}
\item correcting function signatures
\item correcting sizes of data types
\item marking functions as inline
\item marking functions as non-returning.
\end{itemize}
\item For example, if you see \textbf{in\_RAX} in the decompiled view, you should check if there's a call to a function whose return type is mistakenly treated as \textbf{void}.
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Useful Tools}
\begin{itemize}
\item Script: \textbf{FindPotentialDecompilerProblems.java}: Decompiles all functions in a program, looks for problems, and displays them in a navigable table.
\item Script: \textbf{CompareFunctionSizesScript.java}: Decompiles all functions in a program and displays a table which contains the size of each function 
(in instructions) and the size of each decompiled function (in Pcode operations). If a function has many instructions but the decompiled version is small, 
there could be an incorrect assumption regarding the return value.
\end{itemize}
\end{block}
\end{frame}


\begin{frame}
\begin{block}{Useful Tools}
\begin{itemize}
\item Script: \textbf{DecompilerStackProblemsFinderScript.java}: Decompiles all functions in a program and displays information about any local variables assigned
values that are also addresses within the corresponding function's body.
\item From the Code Browser, \textbf{Edit} $\rightarrow$ \textbf{Tool Options...} $\rightarrow$ \textbf{Decompiler} $\rightarrow$ \textbf{Analysis} 
$\rightarrow$ uncheck \textbf{Eliminate unreachable code}: might help diagnose issues.
\end{itemize}
\end{block}
\end{frame}


\subsection{Compiler vs. Decompiler}
\begin{frame}
\begin{block}{Compiler vs. Decompiler}
\begin{itemize}
\item Sometimes compilers can prove certain facts about special cases and use these facts to emit optimized code.
\item This can have consequences for the decompiled code.
\item This isn't an error, just something to keep in mind. 
\end{itemize}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Exercise}
\begin{enumerate}
\item Open and analyze the file \textbf{compilerVsDecompiler}.
\item The functions \textbf{calls\_memcmp} and \textbf{calls\_memcmp\_fixed\_len} implement \textbf{memcmp} using the \textbf{CMPSB.REPE} instruction.
\item Compare the decompiled view of these two functions.  What differences do you see?
\item What accounts for these differences? (hint: examine the assembly code)
\item Note: To compare two functions side-by-side, bring up the \textbf{Functions} window (\textbf{Window} $\rightarrow$ \textbf{Functions} from the Code Browser), select
the two functions, right click and select \textbf{Compare Functions}.  Use the tabs to switch between the Listing and Decompiler views.
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Solution}
\begin{enumerate}
\item[] (advance for solutions)
\pause 
\item \textbf{calls\_memcmp\_fixed\_len} contains \textbf{in\_ZF} and \textbf{in\_CF} in the decompiled code, whereas \textbf{calls\_memcmp} does not.
\item In \textbf{calls\_memcmp\_fixed\_len}, the compiler knows that the loop will be executed at least once (\textbf{RCX} is set to 8).
\item However, in \textbf{calls\_memcmp}, the loop might be executed 0 times (\textbf{RCX} is set to \textbf{param3}).
\item This means that the compiler must initialize the flags \textbf{ZF} and \textbf{CF} in \textbf{calls\_memcmp}, but does not have to in \textbf{calls\_memcmp\_fixed\_len}, since
the loop is guaranteed to execute at least once and that comparison will set the flags. 
\item[] (continued)
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}
\begin{block}{Solution}
\begin{enumerate}
\setcounter{enumi}{4}
\item This is the purpose of the \textbf{CMP RDX,RDX} instruction \textbf{calls\_memcmp} (which does not occur in \textbf{calls\_memcmp\_fixed\_len}).
\item The decompiler doesn't do the analysis to prove that a loop must execute at least once.
\item So in the decompiler's view, the values in \textbf{ZF} and \textbf{CF} at the beginning of \textbf{calls\_memcmp\_fixed\_len} might contribute to the return value (in the ``case''
 when the loop body does not execute).
\end{enumerate}
\end{block}
\end{frame}

\end{document}

